{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "\n",
    "# 一篇\n",
    "def get_detail(url):\n",
    "    result = {} \n",
    "    r = requests.get(url)\n",
    "    soup = BeautifulSoup(r.text, 'html.parser')\n",
    "    result['title'] = soup.select('.entry-title')[0].text\n",
    "    result['dt'] = soup.select('.entry-date')[0].text\n",
    "    result['author'] = soup.select('.author-name')[0].text\n",
    "    result['article'] = ' '.join([p.text for p in soup.select('.entry-content p')])\n",
    "    my_result['tag'] = [tag.text for tag in soup.select('.cat-links a')]\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 首页\n",
    "def parse_link_list(url):\n",
    "    r = requests.get(url)\n",
    "    soup = BeautifulSoup(r.text, 'html.parser')\n",
    "    link_list = [article_url['href'] for article_url in soup.select('.entry-title a')]   # 获取首页文章链接\n",
    "    article_details = []\n",
    "    for link in link_list:\n",
    "        article_details.append(get_detail(link))\n",
    "    return article_details"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 多页\n",
    "page_url = 'http://www.crazyant.net/page/{}'\n",
    "article_total = [] \n",
    "for i in range(1, 3):   \n",
    "    url = page_url.format(i)\n",
    "    article_array = parse_link_list(url)   \n",
    "    article_total.extend(article_array)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
